{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import torch\n",
    "from tqdm import tqdm\n",
    "from initial_dataSet import DataSet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DataSet: FrcSub\n"
     ]
    }
   ],
   "source": [
    "base_dir='./'\n",
    "dataSet_list = ('FrcSub','ASSIST_0910', 'ASSIST_2017','JUNYI', 'MathEC')\n",
    "\n",
    "dataSet_idx = 0\n",
    "data_set_name = dataSet_list[dataSet_idx]\n",
    "dataSet = DataSet(base_dir, data_set_name,build=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "item =dataSet.item\n",
    "item=item[~item.index.duplicated()]\n",
    "item_list, concept_list = [], []\n",
    "for idx in item.index:\n",
    "    now_concept_list = eval(item.loc[idx, 'knowledge_code'])\n",
    "    item_list.extend([idx] * len(now_concept_list))\n",
    "    concept_list.extend(now_concept_list)\n",
    "item_concept = pd.DataFrame({'item': item_list, 'concept': concept_list}).astype('int')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_index(list_1,list_2):\n",
    "    index_x,index_y=[],[]\n",
    "    for i in list_1:\n",
    "        y=set(list_2)-set([i])\n",
    "        index_x.extend([i]*len(y))\n",
    "        index_y.extend(list(y))\n",
    "    return index_x,index_y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([1, 1, 1, 2, 2, 4, 4, 4], [2, 5, 7, 5, 7, 2, 5, 7])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list_a=np.array([1,2,4])\n",
    "list_b=np.array([2,5,7])\n",
    "get_index(list_a,list_b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 536/536 [00:06<00:00, 80.98it/s] \n"
     ]
    }
   ],
   "source": [
    "record=dataSet.record\n",
    "conc_num=dataSet.concept_num\n",
    "stu_list=dataSet.total_stu_list\n",
    "\n",
    "knowledgeCorrect=torch.zeros((conc_num,conc_num)) # 记录知识点的相关程度\n",
    "conc_2_item_correct_nums=torch.zeros(conc_num) # 记录与知识点相关的习题答对了多少次\n",
    "for stu in tqdm(stu_list):\n",
    "    stu_record=record.loc[stu,['item_id','score']]\n",
    "    for i in range(len(stu_record)-1):\n",
    "        if stu_record.iloc[i,1]*stu_record.iloc[i+1,1]==1:\n",
    "            item_id_0=stu_record.iloc[i,0]\n",
    "            concepts_0=item_concept.set_index('item').loc[item_id_0,['concept']].values.reshape(-1)-1\n",
    "            item_id_1=stu_record.iloc[i+1,0]\n",
    "            concepts_1=item_concept.set_index('item').loc[item_id_1,['concept']].values.reshape(-1)-1\n",
    "            idx_0,idx_1=get_index(concepts_0,concepts_1)\n",
    "            knowledgeCorrect[idx_0,idx_1]+=1\n",
    "            conc_2_item_correct_nums[idx_0]+=1\n",
    "            conc_2_item_correct_nums[list(set(idx_0)-set(idx_1))]-=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "conc_2_item_correct_nums[conc_2_item_correct_nums==0]=1e10\n",
    "knowledgeDirected=knowledgeCorrect/conc_2_item_correct_nums.reshape(-1,1)\n",
    "knowledgeDirected[knowledgeCorrect<=0]=0\n",
    "\n",
    "temp0=knowledgeDirected[(1-torch.eye(conc_num)).bool()]\n",
    "min_c=temp0.min()\n",
    "max_c=temp0.max()\n",
    "\n",
    "o=torch.zeros_like(knowledgeDirected)\n",
    "o=(knowledgeDirected-min_c)/(max_c-min_c)\n",
    "o[knowledgeDirected<=0]=0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "l_o=(knowledgeDirected>0).sum()\n",
    "s_o=o.sum()\n",
    "\n",
    "avg = s_o / l_o\n",
    "threshold=avg**3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "relation=pd.DataFrame(o>threshold)\n",
    "relation.columns=relation.columns+1\n",
    "relation.index=relation.index+1\n",
    "\n",
    "relationship=relation.reset_index().melt(id_vars=['index'])\n",
    "relationship=relationship[relationship['value']==True].loc[:,['variable','index']]\n",
    "relationship.columns=['knowledge_code','parent']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "save_dir=dataSet.read_dir\n",
    "    \n",
    "relationship.to_csv(save_dir+'concept_relationship.csv',index=False)"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "527a93331b4b1a8345148922acc34427fb7591433d63b66d32040b6fbbc6d593"
  },
  "kernelspec": {
   "display_name": "Python 3.8.12 ('pytorch')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
